import pandas as pd
import traceback

pd.set_option('display.max_rows', None, 'display.max_columns', None, 'display.max_colwidth', 1000, 'display.expand_frame_repr', False)


def get_trans_maps(xtrans_path):
    en2zh, zh2en = {}, {}
    with open(xtrans_path, 'r', encoding='utf8') as f:
        while True:
            xline = f.readline()
            if not xline:
                break
            if '\r\n' == xline[-2:]:
                xline = xline[:-2]
            elif '\n' == xline[-1:]:
                xline = xline[:-1]
            xen, xzh = xline.split('>')
            en2zh[xen] = xzh
            zh2en[xzh] = xen
    return en2zh, zh2en


if '__main__' == __name__:
    xtrans_path = r'D:\_const\svn\aliyun\cmpltrtok\content\python_nlp\zhuan_gao6\trans\tte_options_zh.trans.txt'
    en2zh, zh2en = get_trans_maps(xtrans_path)
    print(en2zh)
    print(zh2en)

    xcsv_path = r'D:\_dell7590_root\local\LNP_datasets\OCEMOTION-中文7分类细粒度情感分析数据集\OCEMOTION.csv'
    df = pd.read_csv(xcsv_path, delimiter='\t', header=None)
    print(df[:5])
    print(df[2].value_counts())

    cnt = 0
    xwashed_path = r'_save\washed\emotion_pandas_washed.txt'
    with open(xwashed_path, 'w', encoding='utf8') as f:
        for index, xrow in df.iterrows():
            cnt += 1
            # if cnt > 5:
            #     break
            try:
                xid, xtext, xem = xrow
                # print(xid, xtext, xem)
                xid = int(xid)
                xem = en2zh[xem]
                f.write(str(xid) + '\t' + xtext + '\t' + xem + '\n')
            except Exception as ex:
                print(traceback.format_exc())